from factscorer import FactScorer
import json
import os
from dotenv import load_dotenv
import argparse


parser = argparse.ArgumentParser()
parser.add_argument("-dp", "--data_path", type=str, default="../data/gen_factscore.json", help="Path to the dataset")
parser.add_argument("-rp", "--result_path", type=str, default="../res/res_factscore.json", help="Path to the output file")
args = parser.parse_args()

load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
fs = FactScorer(openai_key = api_key)

with open(args.data_path, "r") as f:
    dataset = json.load(f)   

# matching_entities = ['Suthida', 'Mihai Eminescu', 'Matthew Perry', 'Henryk Wieniawski', 'Heinrich Himmler', 'Kubota Beisen', 'Arthur Ewert', 'Matt Hunter (singer)', 'Salome Maswime', 'Rory Burns', 'Andriy Yarmolenko', 'Danny Faure', 'Ilhan Omar', 'Joey Jordison', 'Ayman al-Zawahiri', 'Diego Suarez (garden designer)', 'Muhammad Qutb', 'Daniel Alexander Cameron', 'Paul Anka', 'Grayston Burgess', 'Thomas Piketty', 'Ki Fitzgerald', 'Charmion King', 'Raffaele Maiello', 'Chacho Gaytán', 'Noel Malicdem', 'Kerwin Bell', 'Jorge Enríquez', 'Yordanka Donkova', 'Winston Churchill', 'Muhammad Ali Jinnah', 'Sada Thompson', 'Bobby Fischer', 'Vance Joy', 'Ahsee Tuala', 'Don DeLillo', 'Maddy Crippen', 'Thomas Jack', 'Alf Garland', 'Naz Mitrou-Long', 'Julie Bishop', 'Lina Marulanda']

topics, generations = [], []
for data in dataset:
    topic = data['Prompt'].split("of")[-1].split(".")[0]
    # if topic not in matching_entities:
    #     continue
    for response in data['Responses']:
        topics.append(topic)
        generations.append(response)
fs.get_score(topics, generations, result_path=args.result_path)